
import os
import pandas as pd
import numpy as np
from os.path import exists, expanduser


my_home = expanduser("~")
programdir = os.getcwd()
datadir = os.path.join(my_home, 'Dropbox', 'Entry Technology', 'JPE macro submission', 'Replication Script') # set to directory where the replication files are saved
sname = os.path.join(datadir, 'raw data', 'Census_CBP', 'cbp_1986_2020.csv')  #BDS state by sector data


year_bds = 2020
for year_i in range(1986, (year_bds + 1)):  #sic years and NAICS years with no LFO variable
    indv = 'sic'
    indc = '----'
    if year_i > 1997:
        indv = 'naics'
        indc = '------'

    year_i_abbrev = str(year_i)[2:4]
    filepath_i = os.path.join(datadir,  'raw data', 'Census_CBP', "cbp" + year_i_abbrev + "st.txt")
    df_cbp_i = pd.read_csv(filepath_i)
    if year_i < 2010:
        print(year_i)
        print(indv)
        df_cbp_i = df_cbp_i[df_cbp_i[indv]==indc]
    if year_i >= 2010:  #NAICS years with LFO variable, 2010-year_bds
        df_cbp_i.columns = df_cbp_i.columns.str.lower()
        df_cbp_i = df_cbp_i[np.logical_and(df_cbp_i[indv]==indc, df_cbp_i['lfo']=="-")]

    # Keep only wanted columns
    df_cbp_i = df_cbp_i[['fipstate', 'emp']]
    df_cbp_i['year'] = year_i
    # Reorder columns
    df_cbp_i = df_cbp_i[['year', 'fipstate', 'emp']]
    # If master data exists, append current dataset to master; otherwise, establish master data
    if year_i==1986:
        df_cbp_master = df_cbp_i
        print("CBP master data set created from " + str(year_i) + " data.")
    else:
        df_cbp_master = df_cbp_master.append(df_cbp_i, ignore_index=True)
        print(str(year_i) + " data added to CBP master.")
        print(df_cbp_i)

df_cbp_master = df_cbp_master.rename(columns={'fipstate':'st', 'emp':'emp_cbp'})

df_cbp_master.to_csv(sname, index=False)
